set.seed(42)

# dependencies
library(tidyverse)
library(metafor)
library(knitr)
library(kableExtra)
library(psych)
library(ggExtra)
library(scales) 
#library(ggtext) # devtools::install_github("clauswilke/ggtext")


# function to round all numeric vars in a data frame
round_df <- function(df, n_digits = 3) {
  df %>% mutate_if(is.numeric, round, digits = n_digits)
}

probability_of_superiority_function <- function(x, y) {
  nx <- length(x)
  ny <- length(y)
  rx <- sum(rank(c(x, y))[1:nx])
  A = (rx / nx - (nx + 1) / 2) / ny
  return(A)
}

spearman_brown_correction <- function(r_pearson) {
  inversion <- ifelse(r_pearson < 0, -1, 1)
  val <- round(2*abs(r_pearson)/(1 + abs(r_pearson)), 2)
  return(val*inversion)
}

apa_p_value <- function(p){
  p_formatted <- ifelse(p >= 0.001, paste("=", round(p, 3)),
                        ifelse(p < 0.001, "< .001", NA))
  p_formatted <- gsub(pattern = "0.", replacement = ".", x = p_formatted, fixed = TRUE)
  p_formatted
}

add_heterogeneity_metrics_to_forest <- function(fit) {
  bquote(paste("RE Model (", tau^2, " = ", .(formatC(round(fit$tau2, 1))), 
               ", ", I^2, " = ", .(formatC(round(fit$I2, 1))),
               "%, ", H^2," = ", .(formatC(round(fit$H2, 1))), ")"))
}

# table format in output
options(knitr.table.format = "html") 

# create directory needed to save output
dir.create("models")
dir.create("plots")

Data

data_demographics <- 
  read_csv("../data/effect sizes for meta-analyses/data_demographics.csv")

data_D_scores_internal_consistency_oddeventrials <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_oddeventrials.csv")

data_D_scores_internal_consistency_firstsecondhalves <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_firstsecondhalves.csv") 

data_D_scores_internal_consistency_permuted_estimates <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_permuted_estimates.csv")

data_A_scores_internal_consistency_permuted_estimates <- 
  read_csv("../data/effect sizes for meta-analyses/data_A_scores_internal_consistency_permuted_estimates.csv")

data_D_scores_internal_consistency_permuted_estimates_by_block_order <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_permuted_estimates_by_block_order.csv") 

data_D_scores_test_retest_icc <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_test_retest_icc.csv") 

Demographics & sample sizes

data_demographics %>%
  summarize(mean_age = mean(age, na.rm = TRUE),
            sd_age = sd(age, na.rm = TRUE)) %>% 
  round_df(1) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
mean_age sd_age
20.3 4.4
data_demographics %>%
  count(gender) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
gender n
Female 286
Male 166
Other 1
NA 1216
data_D_scores_test_retest_icc %>%
  summarize(total_trt_n = sum(n)) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
total_trt_n
355
data_D_scores_internal_consistency_permuted_estimates %>%
  summarize(total_ic_n = sum(n)) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
total_ic_n
NA

Internal consistency

Three methods of calculating internal consistency for the Implicit Relational Assessment Procedure (IRAP): - Odd vs even trials (mostly commonly reported in literature) - First vs second half of task (more common for the most popular implicit measure, the IAT; would allow a like-for-like comparison) - Cronbach’s alpha via permutation (most robust)

The IRAP effect is most often quantified using the D1 scoring algorithm (Greenwald, Banaji & Nosek, 2003). i.e., trim all RTs > 10,000 ms, then D1 = (mean RT in the inconsistent blocks - mean RT in the consistent blocks)/SD of RTs in both block types. D1 scores are used throughout. Because of this, it wasn’t possible to use Sam Parson’s splithalf R package. A workflow for permutation-based alpha estimation using D scores is provided below.

In each case, correlations between D scores calculated from the split halves are calculated, and then the Spearman-Brown prophecy formula is applied, i.e., r_sb = 2*r_pearson / (1+r_pearson). Importantly, this correction is applied to the absolute value of the pearson’s correlation, and then its sign is corrected to be congruent with the pearson’s correlation. This ensures that r_sb has a possible range of -1 to +1, as correlations should (whereas correction of native negative correlations allow for a lower limit of -Inf). I haven’t seen this treatment of SB corrections applied to negative correlations well explicated elsewhere, so it is important to note here.

For meta analysis, spearman brown correlations are converted to Fischers r-to-z for meta analysis. Estimates of the meta effect are then converted back for reporting. Heterogeneity metrics represent heterogeneity in Fischer’s r-to-z estimates. See here.

Split-half reliability via odd vs. even trials

Bonett transformations

# fit random Effects model 
fit_oddeven <-  data_D_scores_internal_consistency_oddeventrials %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_oddeven <-
  predict(fit_oddeven, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_oddeven,
                xlab = "Spearman-Brown correlation",
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_oddeven))
text(-1.4, 39, "Study", pos = 4)
text(1.6, 39, "Spearman-Brown correlation [95% CI]", pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_oddeven$k, 
         ", r_sb = ",  round(transf.iabt(predictions_oddeven$pred), 2), 
         ", 95% CI [", round(transf.iabt(predictions_oddeven$ci.lb), 2), ", ", 
         round(transf.iabt(predictions_oddeven$ci.ub), 2), "]", 
         ", 95% CR [", round(transf.iabt(predictions_oddeven$cr.lb), 2), ", ", 
         round(transf.iabt(predictions_oddeven$cr.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_oddeven$k - 1, ") = ", round(fit_oddeven$QE, 2), 
         ", p ", ifelse(fit_oddeven$QEp < 0.0001, "< .0001", paste0("= ", as.character(round(fit_oddeven$QEp, 4)))),
         ", tau^2 = ", round(fit_oddeven$tau2, 2), 
         ", I^2 = ",   round(fit_oddeven$I2, 2),
         "%, H^2 = ",   round(fit_oddeven$H2, 2))

Meta effect: Meta analysis: k = 37, r_sb = 0.56, 95% CI [0.5, 0.61], 95% CR [0.39, 0.68].

Heterogeneity: Heterogeneity tests: Q(df = 36) = 49.7, p = 0.064, tau^2 = 0.02, I^2 = 17.88%, H^2 = 1.22.

Split-half reliability via first vs. second half of the task

Bonett transformations

# fit random Effects model 
fit_firstsecond <- data_D_scores_internal_consistency_firstsecondhalves %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_firstsecond <-
  predict(fit_firstsecond, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_firstsecond,
                xlab = "Spearman-Brown correlation",
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_firstsecond))
text(-1.4, 39, "Study", pos = 4)
text(1.6, 39, "Spearman-Brown correlation [95% CI]", pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_firstsecond$k, 
         ", r_sb = ",  round(transf.iabt(predictions_firstsecond$pred), 2), 
         ", 95% CI [", round(transf.iabt(predictions_firstsecond$ci.lb), 2), ", ", 
         round(transf.iabt(predictions_firstsecond$ci.ub), 2), "]", 
         ", 95% CR [", round(transf.iabt(predictions_firstsecond$cr.lb), 2), ", ", 
         round(transf.iabt(predictions_firstsecond$cr.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_firstsecond$k - 1, ") = ", round(fit_firstsecond$QE, 2), 
         ", p ", ifelse(fit_firstsecond$QEp < 0.0001, "< .0001", paste0("= ", as.character(round(fit_firstsecond$QEp, 4)))),
         ", tau^2 = ", round(fit_firstsecond$tau2, 2), 
         ", I^2 = ",   round(fit_firstsecond$I2, 2),
         "%, H^2 = ",   round(fit_firstsecond$H2, 2))

Meta effect: Meta analysis: k = 37, r_sb = 0.52, 95% CI [0.47, 0.57], 95% CR [0.47, 0.57].

Heterogeneity: Heterogeneity tests: Q(df = 36) = 35.37, p = 0.4981, tau^2 = 0, I^2 = 0%, H^2 = 1.

Permutation-based split half correlations

ie an estimate of alpha through large number number of random half splits (see Parsons, Kruijt, & Fox. 2019). Estimate of alpha obtained via mean of the resampled Spearman-Brown corrected split half correlations, when calculating D1 scores for each half and sampling 5000 permutations.

Bonett transformations

# fit random Effects model 
fit_internal_consistency_permuted_estimates <- 
  data_D_scores_internal_consistency_permuted_estimates %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_permutations <-
  predict(fit_internal_consistency_permuted_estimates, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_internal_consistency_permuted_estimates,
                xlab = bquote(paste("Cronbach's ", alpha)),
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_internal_consistency_permuted_estimates))
text(-1.4, 39, "Study", pos = 4)
text(1.6, 39, bquote(paste("Cronbach's ", alpha, " [95% CI]")), pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_internal_consistency_permuted_estimates$k, 
         ", alpha = ",  round(transf.iabt(predictions_permutations$pred), 2), 
         ", 95% CI [", round(transf.iabt(predictions_permutations$ci.lb), 2), ", ", 
         round(transf.iabt(predictions_permutations$ci.ub), 2), "]", 
         ", 95% CR [", round(transf.iabt(predictions_permutations$cr.lb), 2), ", ", 
         round(transf.iabt(predictions_permutations$cr.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_internal_consistency_permuted_estimates$k - 1, ") = ", round(fit_internal_consistency_permuted_estimates$QE, 2), 
         ", p ", ifelse(fit_internal_consistency_permuted_estimates$QEp < 0.0001, "< .0001", paste0("= ", as.character(round(fit_internal_consistency_permuted_estimates$QEp, 4)))),
         ", tau^2 = ", round(fit_internal_consistency_permuted_estimates$tau2, 2), 
         ", I^2 = ",   round(fit_internal_consistency_permuted_estimates$I2, 2),
         "%, H^2 = ",   round(fit_internal_consistency_permuted_estimates$H2, 2))

# save to disk for making pdf plot
write_rds(fit_internal_consistency_permuted_estimates, "models/fit_internal_consistency_permuted_estimates.rds")

Meta effect: Meta analysis: k = 37, alpha = 0.53, 95% CI [0.47, 0.59], 95% CR [0.3, 0.69].

Heterogeneity: Heterogeneity tests: Q(df = 36) = 52.76, p = 0.0353, tau^2 = 0.04, I^2 = 26.97%, H^2 = 1.37.

GOSH

Sexuality IRAP seems to be an outlier

if(!file.exists("models/fit_gosh_ic.rds")){
  
  fit_gosh_ic <- gosh(fit_internal_consistency_permuted_estimates, 
                      subsets = 5000,
                      parallel = "multicore",  # change to "snow" on windows
                      ncpus = 4)
  
  write_rds(fit_gosh_ic, "models/fit_gosh_ic.rds")
  
} else {
  
  fit_gosh_ic <- read_rds("models/fit_gosh_ic.rds")
  
}

data_for_plotting_ic <- 
  bind_cols(as.tibble(fit_gosh_ic$incl) %>%
              # the study to be separated is here listed by order (counting down from top of forest plot)
              select(include = "18"),  
            as.tibble(fit_gosh_ic$res)) %>%
  mutate(IRAP = ifelse(include, "Sexuality", "All other domains"),
         alpha = transf.iabt(estimate)) %>%
  # randomize rows to improve plotting
  sample_frac(size = 1)

# ggplot(data_for_plotting_ic, aes(alpha, I2)) +
#   geom_point(alpha = 0.4) +
#   scale_color_viridis_d(begin = 0.3, end = 0.7, direction = -1) +
#   theme_classic()

p1 <- 
  ggplot(data_for_plotting_ic, aes(alpha, I2, color = IRAP)) +
  geom_point(alpha = 0.5) +
  scale_color_viridis_d(begin = 0.35, end = 0.65, direction = -1) +
  theme_classic() + 
  xlab(expression("Internal consistency (" * alpha * ")")) +
  ylab(expression("Heterogeneity (" * italic("I")^"2" * ")")) +
  theme(legend.position = c(0.2, 0.85), 
        legend.background = element_rect(fill = scales::alpha("white", 0)))

plot_gosh_ic <- 
  ggMarginal(p1, 
             type = "densigram",
             size = 4,
             margins = "both",
             groupFill = TRUE)

plot_gosh_ic

write_rds(plot_gosh_ic, "models/plot_gosh_ic.rds")

data_for_plotting_ic %>%
  group_by(IRAP) %>%
  summarize(alpha_median = median(alpha, na.rm = TRUE),
            alpha_ci_lower = quantile(alpha, 0.025, na.rm = TRUE),
            alpha_ci_upper = quantile(alpha, 0.975, na.rm = TRUE),
            I2_median = median(I2, na.rm = TRUE),
            I2_ci_lower = quantile(I2, 0.025, na.rm = TRUE),
            I2_ci_upper = quantile(I2, 0.975, na.rm = TRUE),
            H2_median = median(H2, na.rm = TRUE),
            H2_ci_lower = quantile(H2, 0.025, na.rm = TRUE),
            H2_ci_upper = quantile(H2, 0.975, na.rm = TRUE)) %>%
  round_df(2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
IRAP alpha_median alpha_ci_lower alpha_ci_upper I2_median I2_ci_lower I2_ci_upper H2_median H2_ci_lower H2_ci_upper
All other domains 0.53 0.46 0.60 26.52 0 65.58 1.36 1 2.91
Sexuality 0.53 0.46 0.59 15.16 0 61.86 1.18 1 2.62

Sensitivity analysis

excluding Sexuality IRAP as an outlier

# fit random Effects model 
fit_internal_consistency_permuted_estimates_sensitivity <- 
  data_D_scores_internal_consistency_permuted_estimates %>%
  filter(!str_detect(domain, "Sexuality")) %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_permutations_sensitivity <-
  predict(fit_internal_consistency_permuted_estimates_sensitivity, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_internal_consistency_permuted_estimates_sensitivity,
                xlab = bquote(paste("Cronbach's ", alpha)),
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_internal_consistency_permuted_estimates_sensitivity))
text(-1.4, 38, "Study", pos = 4)
text(1.6, 38, bquote(paste("Cronbach's ", alpha, " [95% CI]")), pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_internal_consistency_permuted_estimates_sensitivity$k, 
         ", alpha = ",  round(transf.iabt(predictions_permutations_sensitivity$pred), 2), 
         ", 95% CI [", round(transf.iabt(predictions_permutations_sensitivity$ci.lb), 2), ", ", 
         round(transf.iabt(predictions_permutations_sensitivity$ci.ub), 2), "]", 
         ", 95% CR [", round(transf.iabt(predictions_permutations_sensitivity$cr.lb), 2), ", ", 
         round(transf.iabt(predictions_permutations_sensitivity$cr.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_internal_consistency_permuted_estimates_sensitivity$k - 1, ") = ", round(fit_internal_consistency_permuted_estimates_sensitivity$QE, 2), 
         ", p ", ifelse(fit_internal_consistency_permuted_estimates_sensitivity$QEp < 0.0001, "< .0001", paste0("= ", as.character(round(fit_internal_consistency_permuted_estimates_sensitivity$QEp, 4)))),
         ", tau^2 = ", round(fit_internal_consistency_permuted_estimates_sensitivity$tau2, 2), 
         ", I^2 = ",   round(fit_internal_consistency_permuted_estimates_sensitivity$I2, 2),
         "%, H^2 = ",   round(fit_internal_consistency_permuted_estimates_sensitivity$H2, 2))

# save to disk for making pdf plot
write_rds(fit_internal_consistency_permuted_estimates_sensitivity, "models/fit_internal_consistency_permuted_estimates_sensitivity.rds")

Meta effect: Meta analysis: k = 35, alpha = 0.5, 95% CI [0.45, 0.55], 95% CR [0.45, 0.55].

Heterogeneity: Heterogeneity tests: Q(df = 34) = 29.71, p = 0.6779, tau^2 = 0, I^2 = 0%, H^2 = 1.

GOSH

Sexuality IRAP seems to be an outlier

if(!file.exists("models/fit_gosh_ic_sensitivity.rds")){
  
  fit_gosh_ic_sensitivity <- gosh(fit_internal_consistency_permuted_estimates_sensitivity, 
                                  subsets = 5000,
                                  parallel = "multicore",  # change to "snow" on windows
                                  ncpus = 4)
  
  write_rds(fit_gosh_ic_sensitivity, "models/fit_gosh_ic_sensitivity.rds")
  
} else {
  
  fit_gosh_ic_sensitivity <- read_rds("models/fit_gosh_ic_sensitivity.rds")
  
}

data_for_plotting_ic_sensitivity <- 
  as.tibble(fit_gosh_ic_sensitivity$res) %>%
  mutate(Type = "Other IRAPs",
         alpha = transf.iabt(estimate)) %>%
  # randomize rows to improve plotting
  sample_frac(size = 1)

# ggplot(data_for_plotting_ic_sensitivity, aes(alpha, I2)) +
#   geom_point(alpha = 0.4) +
#   scale_color_viridis_d(begin = 0.3, end = 0.7, direction = -1) +
#   theme_classic()

p3 <- 
  ggplot(data_for_plotting_ic_sensitivity, aes(alpha, I2, color = Type)) +
  geom_point(alpha = 0.5) +
  scale_color_viridis_d(begin = 0.35, end = 0.65, direction = -1) +
  theme_classic() + 
  xlab(expression("Internal consistency (" * alpha * ")")) +
  ylab(expression("Heterogeneity (" * italic("I")^"2" * ")")) +
  theme(legend.position = c(0.2, 0.85)) +
  theme(legend.position = "none") 

plot_gosh_ic_sensitivity <- 
  ggMarginal(p3, 
             type = "densigram",
             size = 4,
             margins = "both",
             groupFill = TRUE)

plot_gosh_ic_sensitivity

write_rds(plot_gosh_ic_sensitivity, "models/plot_gosh_ic_sensitivity.rds")

data_for_plotting_ic_sensitivity %>%
  summarize(alpha_median = median(alpha, na.rm = TRUE),
            alpha_ci_lower = quantile(alpha, 0.025, na.rm = TRUE),
            alpha_ci_upper = quantile(alpha, 0.975, na.rm = TRUE),
            I2_median = median(I2, na.rm = TRUE),
            I2_ci_lower = quantile(I2, 0.025, na.rm = TRUE),
            I2_ci_upper = quantile(I2, 0.975, na.rm = TRUE),
            H2_median = median(H2, na.rm = TRUE),
            H2_ci_lower = quantile(H2, 0.025, na.rm = TRUE),
            H2_ci_upper = quantile(H2, 0.975, na.rm = TRUE)) %>%
  round_df(2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
alpha_median alpha_ci_lower alpha_ci_upper I2_median I2_ci_lower I2_ci_upper H2_median H2_ci_lower H2_ci_upper
0.53 0.45 0.6 3.32 0 29.81 1.03 1 1.42

Test-retest reliability

I.e., Intraclass Correlation Coefficients.

“It has been argued that test-retest reliability should reflect absolute agreement, rather than consistency, between measurements (Koo & Li, 2016). For example, a perfect correlation between scores at two time points may occur also when there is a systematic difference between time points (i.e. a difference that is about equal for all participants).” (Parsons, Kruijt, & Fox, 2019). Absolute agreement therefore also takes within-participant changes into account in its denominator, where consistency does not.

# fit random Effects model 
fit_test_retest_icc <- data_D_scores_test_retest_icc %>%
  rma(yi   = ICC, 
      sei  = se, 
      data = .,
      slab = domain)

# make predictions 
predictions_test_retest_icc <-
  predict(fit_test_retest_icc, digits = 5) %>%
  as.data.frame() %>%
  round_df(2) 

# plot
metafor::forest(fit_test_retest_icc,
                xlab = "Absolute Agreement ICC",
                addcred = TRUE,
                refline = FALSE,
                xlim = c(-1.5, 1.7),
                at = c(-0.5, -0.25, 0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_test_retest_icc))
text(-1.5, 10, "Study", pos = 4)
text(1.7, 10, "ICC [95% CI]", pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_test_retest_icc$k, 
         ", ICC2 = ",  round(transf.ztor(predictions_test_retest_icc$pred), 2), 
         ", 95% CI [", round(transf.ztor(predictions_test_retest_icc$ci.lb), 2), ", ", 
         round(transf.ztor(predictions_test_retest_icc$ci.ub), 2), "]", 
         ", 95% CR [", round(transf.ztor(predictions_test_retest_icc$cr.lb), 2), ", ", 
         round(transf.ztor(predictions_test_retest_icc$cr.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_test_retest_icc$k - 1, ") = ", round(fit_test_retest_icc$QE, 2), 
         ", p ", ifelse(fit_test_retest_icc$QEp < 0.0001, "< .0001", paste0("= ", as.character(round(fit_test_retest_icc$QEp, 4)))),
         ", tau^2 = ", round(fit_test_retest_icc$tau2, 2), 
         ", I^2 = ",   round(fit_test_retest_icc$I2, 2),
         "%, H^2 = ",   round(fit_test_retest_icc$H2, 2))

# write to disk for pdf plots
write_rds(fit_test_retest_icc, "models/fit_test_retest_icc.rds")

Meta effect: Meta analysis: k = 8, ICC2 = 0.2, 95% CI [0.05, 0.33], 95% CR [-0.15, 0.49].

Heterogeneity: Heterogeneity tests: Q(df = 7) = 21.44, p = 0.0032, tau^2 = 0.03, I^2 = 64.75%, H^2 = 2.84.

GOSH

if(!file.exists("models/fit_gosh_trt.rds")){
  
  fit_gosh_trt <- gosh(fit_test_retest_icc, 
                       parallel = "multicore", # change to "snow" on windows
                       ncpus = 4)
  
  write_rds(fit_gosh_trt, "models/fit_gosh_trt.rds")
  
} else {
  
  fit_gosh_trt <- read_rds("models/fit_gosh_trt.rds")
  
}

data_for_plotting_trt <- 
  as.tibble(fit_gosh_trt$res) %>%
  mutate(Type = "Other IRAPs",
         ICC = transf.ztor(estimate)) %>%
  # randomize rows to improve plotting
  sample_frac(size = 1)

# ggplot(data_for_plotting_ic, aes(alpha, I2)) +
#   geom_point(alpha = 0.4) +
#   scale_color_viridis_d(begin = 0.3, end = 0.7, direction = -1) +
#   theme_classic()

p2 <- 
  ggplot(data_for_plotting_trt, aes(ICC, I2, color = Type)) +
  geom_point(alpha = 0.8) +
  scale_color_viridis_d(begin = 0.35, end = 0.65, direction = -1) +
  theme_classic() + 
  xlab("Test-retest reliability (ICC)") +
  ylab(expression("Heterogeneity (" * italic("I")^"2" * ")")) +
  theme(legend.position = "none") 

plot_gosh_trt <- 
  ggMarginal(p2, 
             type = "densigram",
             size = 4,
             margins = "both",
             groupFill = TRUE)

plot_gosh_trt

write_rds(plot_gosh_trt, "models/plot_gosh_trt.rds")
  
data_for_plotting_trt %>%
  summarize(ICC_median = median(ICC, na.rm = TRUE),
            ICC_ci_lower = quantile(ICC, 0.025, na.rm = TRUE),
            ICC_ci_upper = quantile(ICC, 0.975, na.rm = TRUE),
            I2_median = median(I2, na.rm = TRUE),
            I2_ci_lower = quantile(I2, 0.025, na.rm = TRUE),
            I2_ci_upper = quantile(I2, 0.975, na.rm = TRUE),
            H2_median = median(H2, na.rm = TRUE),
            H2_ci_lower = quantile(H2, 0.025, na.rm = TRUE),
            H2_ci_upper = quantile(H2, 0.975, na.rm = TRUE)) %>%
  round_df(2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
ICC_median ICC_ci_lower ICC_ci_upper I2_median I2_ci_lower I2_ci_upper H2_median H2_ci_lower H2_ci_upper
0.18 0 0.32 53.34 0 75.74 2.14 1 4.12

Power implications

Maximum observed correlation of two measures is a function of their true correlation and the reliability (i.e., self-correlation) of each measure.

\(r_{xy}=\frac{\rho_{xy}}{\sqrt{\rho_{xx}\rho_{yy}}}\)

Internal Consistency

observed_r <- function(true_r, reliability_a, reliability_b) {
  round(true_r * sqrt(reliability_a*reliability_b), 2)
}

max_cor <- observed_r(true_r = 1.0, reliability_a = transf.iabt(predictions_permutations_sensitivity$pred), reliability_b = 1.0)

medium_cor <- observed_r(true_r = 0.5, reliability_a = transf.iabt(predictions_permutations_sensitivity$pred), reliability_b = 1.0)

Given this meta estimate of reliability, assuming that the internal consistency of an external variable is perfect (1.0), the maximum correlation between the IRAP and that external variable (i.e., when true correlation is 1.0) is 0.71. For a true correlation of medium size (r = .5), the observed correlation would be 0.35.

Test-retest

max_cor <- observed_r(true_r = 1.0, reliability_a = transf.ztor(predictions_test_retest_icc$pred), reliability_b = 1.0)

medium_cor <- observed_r(true_r = 0.5, reliability_a = transf.iabt(predictions_test_retest_icc$pred), reliability_b = 1.0)

Given this meta estimate of reliability, assuming that the internal consistency of an external variable is perfect (1.0), the maximum correlation between the IRAP and that external variable (i.e., when true correlation is 1.0) is 0.44. For a true correlation of medium size (r = .5), the observed correlation would be 0.21.

Potential ways to improve reliability

Lengthening the task

Internal Consistency

The Spearman-Brown prediction formula can be rearranged to make prediction about how the length of the test would need to change to meet a goal reliability:

\(\rho_{xx'}^*=\frac {n\rho_{xx'}}{1+(n-1)\rho_{xx'}}\)

where \(\rho_{xx'}^*\) refers to the goal reliability, \(\rho_{xx'}\) refers to the current reliability, and \(n\) refers to the test length multiplier.

current_ic <- transf.iabt(predictions_permutations_sensitivity$pred)

goal_ic <- 0.70
ic_length_increase_.70 <- round((goal_ic*(1 - current_ic)) / (current_ic*(1 - goal_ic)), 1)

goal_ic <- 0.80
ic_length_increase_.80 <- round((goal_ic*(1 - current_ic)) / (current_ic*(1 - goal_ic)), 1)

goal_ic <- 0.90
ic_length_increase_.90 <- round((goal_ic*(1 - current_ic)) / (current_ic*(1 - goal_ic)), 1)

Using \(a\) = 0.5, the IRAP would have to be 2.3 times longer for it to have an internal consistency of >=.70, 4 times longer for it to have an internal consistency of >=.80, or 9.1 times longer for it to have an internal consistency of >=.90.

Test-retest

The Spearman-Brown prediction formula can be rearranged to make prediction about how the length of the test would need to change to meet a goal reliability:

\({\rho }_{xx'}^{*}={\frac {n{\rho }_{xx'}}{1+(n-1){\rho }_{xx'}}}\)

where \({\rho }_{xx'}^{*}\) refers to the goal reliability, \({\rho }_{xx'}\) refers to the current reliability, and \({n}\) refers to the test length multiplier.

current_test_retest <- transf.ztor(predictions_test_retest_icc$pred)

goal_test_retest <- 0.90
trt_length_increase_.90 <- round((goal_test_retest*(1 - current_test_retest)) / (current_test_retest*(1 - goal_test_retest)), 1)

goal_test_retest <- 0.80
trt_length_increase_.80 <- round((goal_test_retest*(1 - current_test_retest)) / (current_test_retest*(1 - goal_test_retest)), 1)

goal_test_retest <- 0.70
trt_length_increase_.70 <- round((goal_test_retest*(1 - current_test_retest)) / (current_test_retest*(1 - goal_test_retest)), 1)

Using the absolute agreement estimate of 0.2, the IRAP would have to be 9.5 times longer for it to have a test-retest of >=.70, 16.3 times longer for it to have a test-retest of >=.80, or 36.6 times longer for it to have a test-retest of >=.90.

Alternative scoring algorithm: A scores

Internal consistency using permutation approach, changing D scoring to A scoring. Compare results of meta analysis against permuted IC sensitivity meta above for apples-to-apples comparison.

# fit random Effects model 
fit_internal_consistency_permuted_estimates_sensitivity_A_scores <- 
  data_A_scores_internal_consistency_permuted_estimates %>%
  filter(!str_detect(domain, "Sexuality")) %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_permutations_sensitivity_A_scores <-
  predict(fit_internal_consistency_permuted_estimates_sensitivity_A_scores, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_internal_consistency_permuted_estimates_sensitivity_A_scores,
                xlab = bquote(paste("Cronbach's ", alpha)),
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_internal_consistency_permuted_estimates_sensitivity_A_scores))
text(-1.4, 38, "Study", pos = 4)
text(1.6, 38, bquote(paste("Cronbach's ", alpha, " [95% CI]")), pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_internal_consistency_permuted_estimates_sensitivity_A_scores$k, 
         ", alpha = ",  round(transf.iabt(predictions_permutations_sensitivity_A_scores$pred), 2), 
         ", 95% CI [", round(transf.iabt(predictions_permutations_sensitivity_A_scores$ci.lb), 2), ", ", 
         round(transf.iabt(predictions_permutations_sensitivity_A_scores$ci.ub), 2), "]", 
         ", 95% CR [", round(transf.iabt(predictions_permutations_sensitivity_A_scores$cr.lb), 2), ", ", 
         round(transf.iabt(predictions_permutations_sensitivity_A_scores$cr.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_internal_consistency_permuted_estimates_sensitivity_A_scores$k - 1, ") = ", round(fit_internal_consistency_permuted_estimates_sensitivity_A_scores$QE, 2), 
         ", p ", ifelse(fit_internal_consistency_permuted_estimates_sensitivity_A_scores$QEp < 0.0001, "< .0001", paste0("= ", as.character(round(fit_internal_consistency_permuted_estimates_sensitivity_A_scores$QEp, 4)))),
         ", tau^2 = ", round(fit_internal_consistency_permuted_estimates_sensitivity_A_scores$tau2, 2), 
         ", I^2 = ",   round(fit_internal_consistency_permuted_estimates_sensitivity_A_scores$I2, 2),
         "%, H^2 = ",   round(fit_internal_consistency_permuted_estimates_sensitivity_A_scores$H2, 2))

# save to disk for making pdf plot
write_rds(fit_internal_consistency_permuted_estimates_sensitivity_A_scores, "models/fit_internal_consistency_permuted_estimates_sensitivity_A_scores.rds")

Meta effect: Meta analysis: k = 35, alpha = 0.55, 95% CI [0.49, 0.6], 95% CR [0.37, 0.67].

Heterogeneity: Heterogeneity tests: Q(df = 34) = 38.11, p = 0.2878, tau^2 = 0.02, I^2 = 18.64%, H^2 = 1.23.

Use only either fixed or moving response options

Internal consistency using permutation approach, moderator meta analysis between response option locations between domains.

# fit random Effects model 
fit_internal_consistency_permuted_estimates_sensitivity_response_option_locations <- 
  data_D_scores_internal_consistency_permuted_estimates %>%
  filter(!str_detect(domain, "Sexuality") &
           !is.na(response_option_locations)) %>%
  rma(yi   = yi, 
      vi   = vi, 
      mods = ~response_option_locations,
      data = .,
      slab = paste(domain, response_option_locations))

fit_internal_consistency_permuted_estimates_sensitivity_response_option_locations
## 
## Mixed-Effects Model (k = 28; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0 (SE = 0.0208)
## tau (square root of estimated tau^2 value):             0
## I^2 (residual heterogeneity / unaccounted variability): 0.00%
## H^2 (unaccounted variability / sampling variability):   1.00
## R^2 (amount of heterogeneity accounted for):            0.00%
## 
## Test for Residual Heterogeneity:
## QE(df = 26) = 21.6729, p-val = 0.7065
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 3.1730, p-val = 0.0749
## 
## Model Results:
## 
##                                  estimate      se     zval    pval    ci.lb 
## intrcpt                            0.8428  0.1061   7.9395  <.0001   0.6347 
## response_option_locationsMoving   -0.2213  0.1243  -1.7813  0.0749  -0.4649 
##                                   ci.ub 
## intrcpt                          1.0508  *** 
## response_option_locationsMoving  0.0222    . 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
transf.iabt(c(0.8352, 0.8352-0.2163))
## [1] 0.5662123 0.4614635

Use only one block order

Internal consistency using permutation approach, multilevel moderator meta analysis between block orders for the domains that have both.

# fit random Effects model 
fit_internal_consistency_permuted_estimates_by_block_order <- 
  data_D_scores_internal_consistency_permuted_estimates_by_block_order %>%
  filter(!str_detect(domain, "Sexuality")) %>%
  rma(yi   = yi, 
      vi   = vi, 
      mods = ~block_order,
      data = .,
      slab = paste(domain, block_order))

fit_internal_consistency_permuted_estimates_by_block_order
## 
## Mixed-Effects Model (k = 40; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0 (SE = 0.0427)
## tau (square root of estimated tau^2 value):             0
## I^2 (residual heterogeneity / unaccounted variability): 0.00%
## H^2 (unaccounted variability / sampling variability):   1.00
## R^2 (amount of heterogeneity accounted for):            0.00%
## 
## Test for Residual Heterogeneity:
## QE(df = 38) = 21.2803, p-val = 0.9870
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 0.5091, p-val = 0.4755
## 
## Model Results:
## 
##                   estimate      se    zval    pval    ci.lb   ci.ub 
## intrcpt             0.5769  0.1036  5.5711  <.0001   0.3740  0.7799  *** 
## block_orderincon    0.1092  0.1530  0.7135  0.4755  -0.1908  0.4092      
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
transf.iabt(c(0.5618, 0.5618+0.1483))
## [1] 0.4298182 0.5084050

Session info

sessionInfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_IE.UTF-8/en_IE.UTF-8/en_IE.UTF-8/C/en_IE.UTF-8/en_IE.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] scales_1.1.1     ggExtra_0.9      psych_1.9.12.31  kableExtra_1.1.0
##  [5] knitr_1.28       metafor_2.4-0    Matrix_1.2-18    forcats_0.5.0   
##  [9] stringr_1.4.0    dplyr_0.8.5      purrr_0.3.4      readr_1.3.1     
## [13] tidyr_1.0.3      tibble_3.0.1     ggplot2_3.3.0    tidyverse_1.3.0 
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.4.6      lubridate_1.7.8   lattice_0.20-41   assertthat_0.2.1 
##  [5] digest_0.6.25     mime_0.9          R6_2.4.1          cellranger_1.1.0 
##  [9] backports_1.1.6   reprex_0.3.0      evaluate_0.14     highr_0.8        
## [13] httr_1.4.1        pillar_1.4.4      rlang_0.4.6       readxl_1.3.1     
## [17] rstudioapi_0.11   miniUI_0.1.1.1    rmarkdown_2.1     labeling_0.3     
## [21] webshot_0.5.2     munsell_0.5.0     shiny_1.4.0.2     broom_0.5.6      
## [25] httpuv_1.5.2      compiler_4.0.0    modelr_0.1.7      xfun_0.13        
## [29] pkgconfig_2.0.3   mnormt_1.5-7      htmltools_0.4.0   tidyselect_1.0.0 
## [33] fansi_0.4.1       viridisLite_0.3.0 later_1.0.0       crayon_1.3.4     
## [37] dbplyr_1.4.3      withr_2.2.0       grid_4.0.0        xtable_1.8-4     
## [41] nlme_3.1-147      jsonlite_1.6.1    gtable_0.3.0      lifecycle_0.2.0  
## [45] DBI_1.1.0         magrittr_1.5      cli_2.0.2         stringi_1.4.6    
## [49] farver_2.0.3      promises_1.1.0    fs_1.4.1          xml2_1.3.2       
## [53] ellipsis_0.3.0    generics_0.0.2    vctrs_0.2.4       tools_4.0.0      
## [57] glue_1.4.0        hms_0.5.3         fastmap_1.0.1     parallel_4.0.0   
## [61] yaml_2.2.1        colorspace_1.4-1  rvest_0.3.5       haven_2.2.0